package it.uniroma2.exp.tools;

import it.uniroma2.dtk.dt.DT;
import it.uniroma2.dtk.dt.GenericDT;
import it.uniroma2.util.tree.Tree;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;

public class QCDataPrep {

	/**
	 * @param args
	 * 0: vector size
	 * 1: lambda
	 * 2: base qc data files folder
	 * 3: base output folder
	 * 4: type of composition operation (PRODUCT, CONVOLUTION)
	 * 5: lexicalized
	 * 6: [random offset]
	 */
	public static void main(String[] args) {
		GenericDT dt = null;
		File baseFolder = null;
		File outputFolder = null;
		try {
			int vectorSize = Integer.parseInt(args[0]);
			boolean lexicalized = Boolean.parseBoolean(args[5]);
			int randomOffset = args.length > 6 ? Integer.parseInt(args[6]) : 0;
			dt = new GenericDT(randomOffset, vectorSize, true, lexicalized, args[4]);
			dt.setLambda(Double.parseDouble(args[1]));
			baseFolder = new File(args[2]);
			outputFolder = new File(args[3]+File.separator+args[4]+File.separator+args[0]+File.separator+args[1]);
			if (!outputFolder.exists())
				outputFolder.mkdirs();
		} catch (Exception e) {
			e.printStackTrace();
		}
		File[] tests = {
				new File(baseFolder, "ABBR_test.dat"),
				new File(baseFolder, "DESC_test.dat"),
				new File(baseFolder, "ENTY_test.dat"),
				new File(baseFolder, "HUM_test.dat"),
				new File(baseFolder, "LOC_test.dat"),
				new File(baseFolder, "NUM_test.dat")
		};
		File[] trains = {
				new File(baseFolder, "ABBR_train.dat"),
				new File(baseFolder, "DESC_train.dat"),
				new File(baseFolder, "ENTY_train.dat"),
				new File(baseFolder, "HUM_train.dat"),
				new File(baseFolder, "LOC_train.dat"),
				new File(baseFolder, "NUM_train.dat")
		};
		try {
			System.out.println("Computing test file...");
			BufferedWriter out = new BufferedWriter(new FileWriter(new File(outputFolder, "test.svm")));
			compute(tests, out, dt);
			out.close();
			System.out.println("Computing train file...");
			out = new BufferedWriter(new FileWriter(new File(outputFolder, "train.svm")));
			compute(trains, out, dt);
			out.close();
		}
		catch (Exception e) {
			e.printStackTrace();
		}
	}
	
	public static void compute(File[] files, BufferedWriter out, DT dt) throws Exception {
		for (File file : files) {
			String category = file.getName().substring(0, file.getName().indexOf("_"));
			System.out.println("Loading class "+category);
			BufferedReader in = new BufferedReader(new FileReader(file));
			String line = in.readLine();
			int computed = 0;
			while (line != null) {
				if (line.startsWith("1")) {
					computed++;
					System.out.print(".");
					String tree = line.substring(line.indexOf("|BT|")+4, line.indexOf("|ET|")).trim(); 
					String newLine = line.replace("|ET|", "|ET| "+vectorToSvmString(dt.dt(Tree.fromPennTree(tree)))+" |EV|");
					newLine = newLine.substring(1).trim();
					out.write(category+"\t"+newLine);
					out.newLine();
				}
				else if (computed > 0)
					break;
				line = in.readLine();
			}
			in.close();
			System.out.println();
		}
	}

	public static String vectorToSvmString(double[] vector) {
		String result = "";
		for (int i=0; i<vector.length; i++)
			if (vector[i] != 0)
				result += (i+1)+":"+vector[i]+" ";
		return result.trim();
	}

}
